﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace WorldLab.SampleAnalysisLib
{
    /// sample statistic
    public class SampleStatistic
    {
        /// unit statistic
        public class UnitStatistic
        {
            private int index;/// independent index
            private double indepValue;/// independent value
            
            private int sampleCountsEquValue;/// sample counts
            private double weightSumEquValue;/// sample weight sum
            private int sampleCountsEquValueDepEqu1;
            private double weightSumEquValueDepEqu1;
            private int accuSampleCountsEquValue;
            private double accuWeightEquValue;
            private int accuSampleCountsEquValueDepEqu1;
            private double accuWeightEquValueDepEqu1;

            private double possiveWeight;
            private double negativeWeight;
            private double varianceOfPosWei;
            private double varianceOfNegWei;
            private double differenceOfWei;
            private double tStatistic;

            public int Index/// independent index
            {
                get
                {
                    return index;
                }
                set
                {
                    index = value;
                }
            }
            public double IndepValue/// independent value
            {
                get
                {
                    return indepValue;
                }
                set
                {
                    indepValue = value;
                }
            }

            public int SampleCountsEquValue
            {
                get
                {
                    return sampleCountsEquValue;
                }
                set
                {
                    sampleCountsEquValue = value;
                }
            }
            public double WeightSumEquValue
            {
                get
                {
                    return weightSumEquValue;
                }
                set
                {
                    weightSumEquValue = value;
                }
            }
            public int SampleCountsEquValueDepEqu1
            {
                get
                {
                    return sampleCountsEquValueDepEqu1;
                }
                set
                {
                    sampleCountsEquValueDepEqu1 = value;
                }
            }
            public double WeightSumEquValueDepEqu1
            {
                get
                {
                    return weightSumEquValueDepEqu1;
                }
                set
                {
                    weightSumEquValueDepEqu1 = value;
                }
            }

            public int AccuSampleCountsEquValue
            {
                get
                {
                    return accuSampleCountsEquValue;
                }
                set
                {
                    accuSampleCountsEquValue = value;
                }
            }
            public double AccuWeightEquValue
            {
                get
                {
                    return accuWeightEquValue;
                }
                set
                {
                    accuWeightEquValue = value;
                }
            }
            public int AccuSampleCountsEquValueDepEqu1
            {
                get
                {
                    return accuSampleCountsEquValueDepEqu1;
                }
                set
                {
                    accuSampleCountsEquValueDepEqu1 = value;
                }
            }
            public double AccuWeightEquValueDepEqu1
            {
                get
                {
                    return accuWeightEquValueDepEqu1;
                }
                set
                {
                    accuWeightEquValueDepEqu1 = value;
                }
            }

            public double PossiveWeight
            {
                get
                {
                    return possiveWeight;
                }
                set
                {
                    possiveWeight = value;
                }
            }
            public double NegativeWeight
            {
                get
                {
                    return negativeWeight;
                }
                set
                {
                    negativeWeight = value;
                }
            }
            public double VarianceOfPosWei
            {
                get
                {
                    return varianceOfPosWei;
                }
                set
                {
                    varianceOfPosWei = value;
                }
            }
            public double VarianceOfNegWei
            {
                get
                {
                    return varianceOfNegWei;
                }
                set
                {
                    varianceOfNegWei = value;
                }
            }
            public double DifferenceOfWei
            {
                get
                {
                    return differenceOfWei;
                }
                set
                {
                    differenceOfWei = value;
                }
            }
            public double TStatistic
            {
                get
                {
                    return tStatistic;
                }
                set
                {
                    tStatistic = value;
                }
            }

            public UnitStatistic()
            {
                indepValue = double.NaN;
                sampleCountsEquValue = 0;
                weightSumEquValue = double.NaN;
                sampleCountsEquValueDepEqu1 = 0;
                weightSumEquValueDepEqu1 = double.NaN;
                accuSampleCountsEquValue = 0;
                accuWeightEquValue = double.NaN;
                accuSampleCountsEquValueDepEqu1 = 0;
                accuWeightEquValueDepEqu1 = double.NaN;
                possiveWeight = double.NaN;
                negativeWeight = double.NaN;
                varianceOfPosWei = double.NaN;
                varianceOfNegWei = double.NaN;
                differenceOfWei = double.NaN;
                tStatistic = double.NaN;
            }

            public UnitStatistic(int idx, double val, int cntsEquValue, double weiEquValue,
                int cntsEquValueDepEqu1, double weiEquValueDepEqu1)
            {
                index = idx;
                indepValue = val;
                sampleCountsEquValue = cntsEquValue;
                weightSumEquValue = weiEquValue;
                sampleCountsEquValueDepEqu1 = cntsEquValueDepEqu1;
                weightSumEquValueDepEqu1 = weiEquValueDepEqu1;
            }
        }

        /// independent statistic
        public class IndepStatistic
        {
            private int index;
            private List<UnitStatistic> unitStatisList;

            public int Index
            {
                get
                {
                    return index;
                }
                set
                {
                    index = value;
                }
            }
            public List<UnitStatistic> UnitStatisList
            {
                get
                {
                    return unitStatisList;
                }
                set
                {
                    unitStatisList = value;
                }
            }

            public IndepStatistic(int idx, List<UnitStatistic> unitStaList)
            {
                index = idx;
                unitStatisList = unitStaList;
            }

            public void Dispose()
            {
                UnitStatistic unitSta = null;
                for (int i = 0; i < unitStatisList.Count; ++i)
                {
                    unitSta = unitStatisList[i];
                    unitSta = null;
                }
                unitStatisList.Clear();
                unitStatisList = null;
            }

            /// find unit statistic corresponding to maximum T
            public UnitStatistic FindUnitStatisOfMaxTSta()
            {
                if (unitStatisList.Count == 0)
                {
                    return null;
                }
                UnitStatistic result = null;
                for (int i = 0; i < unitStatisList.Count; ++i)
                {
                    if (!double.IsNaN(unitStatisList[i].TStatistic))
                    {
                        result = unitStatisList[i];
                        break;
                    }
                }
                if (result == null)
                {
                    return null;
                }
                for (int i = 0; i < unitStatisList.Count; ++i)
                {
                    if (!double.IsNaN(unitStatisList[i].TStatistic))
                    {
                        if (unitStatisList[i].TStatistic > result.TStatistic)
                        {
                            result = unitStatisList[i];
                        }
                    }
                }
                return result;
            }
        
            public void OutputStatisticResult(string txtFileName)
            {
                FileStream fsw = new FileStream(txtFileName, FileMode.Append);
                StreamWriter sw = new StreamWriter(fsw);
                string lineText = "";
                for (int i = 0; i < unitStatisList.Count; ++i)
                {
                    lineText = index.ToString() + " ";
                    lineText += unitStatisList[i].IndepValue.ToString() + " ";
                    lineText += unitStatisList[i].SampleCountsEquValue.ToString() + " ";
                    lineText += unitStatisList[i].WeightSumEquValue.ToString() + " ";
                    lineText += unitStatisList[i].SampleCountsEquValueDepEqu1.ToString() + " ";
                    lineText += unitStatisList[i].WeightSumEquValueDepEqu1.ToString() + " ";
                    lineText += unitStatisList[i].AccuSampleCountsEquValue.ToString() + " ";
                    lineText += unitStatisList[i].AccuWeightEquValue.ToString() + " ";
                    lineText += unitStatisList[i].AccuSampleCountsEquValueDepEqu1.ToString() + " ";
                    lineText += unitStatisList[i].AccuWeightEquValueDepEqu1.ToString() + " ";
                    lineText += unitStatisList[i].PossiveWeight.ToString() + " ";
                    lineText += unitStatisList[i].NegativeWeight.ToString() + " ";
                    lineText += unitStatisList[i].VarianceOfPosWei.ToString() + " ";
                    lineText += unitStatisList[i].VarianceOfNegWei.ToString() + " ";
                    lineText += unitStatisList[i].DifferenceOfWei.ToString() + " ";
                    lineText += unitStatisList[i].TStatistic.ToString();
                    sw.WriteLine(lineText);
                }
                sw.Close();
                sw.Dispose();
                sw = null;
                fsw.Close();
                fsw.Dispose();
                fsw = null;
            }
        }

        private List<Sample> sampleList; /// sample list
        private int indepCount; /// independent count

        public List<Sample> SampleList /// sample list
        {
            get
            {
                return sampleList;
            }
            set
            {
                sampleList = value;
            }
        }
        public int IndepCount /// independent count
        {
            get
            {
                return indepCount;
            }
            set
            {
                indepCount = value;
            }
        }

        public SampleStatistic()
        {
            sampleList = null;
            indepCount = 0;
        }

        public SampleStatistic(List<Sample> samples, int indepCnts)
        {
            sampleList = samples;
            indepCount = indepCnts;
        }

        /// independent statistic to specified index
        public IndepStatistic IndepStatis(int idx)
        {
            List<UnitStatistic> unitStatisList = new List<UnitStatistic>();

            double value;
            UnitStatistic unitSta;
            for (int i = 0; i < sampleList.Count; ++i)
            {
                value = sampleList[i].IndepValues[idx];
                unitSta = unitStatisList.Find(
                    delegate(UnitStatistic sta)
                    {
                        return sta.IndepValue == value;
                    });
                if (unitSta != null)
                {
                    unitSta.SampleCountsEquValue++;
                    unitSta.WeightSumEquValue += sampleList[i].Weight;
                    if (sampleList[i].DepValue == 1.0)
                    {
                        unitSta.SampleCountsEquValueDepEqu1++;
                        unitSta.WeightSumEquValueDepEqu1 += sampleList[i].Weight;
                    }
                }
                else
                {
                    UnitStatistic sta = new UnitStatistic(idx, value, 1, sampleList[i].Weight, 0, 0.0);
                    if (sampleList[i].DepValue == 1.0)
                    {
                        sta.SampleCountsEquValueDepEqu1 = 1;
                        sta.WeightSumEquValueDepEqu1 = sampleList[i].Weight;
                    }
                    unitStatisList.Add(sta);
                }
            }
            if (unitStatisList.Count == 0)
            {
                return null;
            }

            // sort result
            for (int i = 0; i < unitStatisList.Count; ++i)
            {
                unitStatisList.Sort(
                    delegate(UnitStatistic indSta1, UnitStatistic indSta2)
                    {
                        return indSta1.IndepValue.CompareTo(indSta2.IndepValue);
                    });
            }

            // accumulated statistic
            int unitCount = unitStatisList.Count;
            for (int i = 0; i < unitCount; ++i)
            {
                unitSta = unitStatisList[i];
                for (int k = 0; k < unitCount; ++k)
                {
                    if (unitStatisList[k].IndepValue > unitStatisList[i].IndepValue)
                    {
                        break;
                    }
                    unitSta.AccuSampleCountsEquValue += unitStatisList[k].SampleCountsEquValue;
                    unitSta.AccuWeightEquValue += unitStatisList[k].WeightSumEquValue;
                    unitSta.AccuSampleCountsEquValueDepEqu1 +=
                        unitStatisList[k].SampleCountsEquValueDepEqu1;
                    unitSta.AccuWeightEquValueDepEqu1 += unitStatisList[k].WeightSumEquValueDepEqu1;
                }
            }

          

            //weights-of-evidence statistic
            double smallValue = 0.0000000001;
            // V
            double VA = unitStatisList[unitCount - 1].AccuWeightEquValue;
            // D+
            double Dp = unitStatisList[unitCount - 1].AccuWeightEquValueDepEqu1;
            // D-
            double Dn = VA - Dp;
            // V+, V-, V+D+, V+D-, V-D+, V-D-
            double Vp, Vn, VpDp, VpDn, VnDp, VnDn;
            for (int i = 0; i < unitCount; ++i)
            {
                Vp = unitStatisList[i].AccuWeightEquValue;
                Vn = VA - Vp;
                VpDp = unitStatisList[i].AccuWeightEquValueDepEqu1;
                VpDn = Vp - VpDp;
                VnDp = Dp - VpDp;
                VnDn = Vn - VnDp;
                if (Math.Abs(Dp - 0.0) < smallValue || Math.Abs(Dn - 0.0) < smallValue ||
                    Math.Abs(VpDp - 0.0) < smallValue || Math.Abs(VpDn - 0.0) < smallValue ||
                    Math.Abs(VnDp - 0.0) < smallValue || Math.Abs(VnDn - 0.0) < smallValue)
                {
                    unitStatisList[i].PossiveWeight = double.NaN;
                    unitStatisList[i].NegativeWeight = double.NaN;
                    unitStatisList[i].VarianceOfPosWei = double.NaN;
                    unitStatisList[i].VarianceOfNegWei = double.NaN;
                    unitStatisList[i].DifferenceOfWei = double.NaN;
                    unitStatisList[i].TStatistic = double.NaN;
                }
                else
                {
                    unitStatisList[i].PossiveWeight = Math.Log((VpDp / Dp) / (VpDn / Dn));
                    unitStatisList[i].NegativeWeight = Math.Log((VnDp / Dp) / (VnDn / Dn));
                    unitStatisList[i].VarianceOfPosWei = 1.0 / VpDp + 1.0 / VpDn;
                    unitStatisList[i].VarianceOfNegWei = 1.0 / VnDp + 1.0 / VnDn;
                    unitStatisList[i].DifferenceOfWei =
                        unitStatisList[i].PossiveWeight - unitStatisList[i].NegativeWeight;
                    unitStatisList[i].TStatistic = Math.Abs(unitStatisList[i].DifferenceOfWei) /
                        Math.Sqrt(unitStatisList[i].VarianceOfPosWei +
                        unitStatisList[i].VarianceOfNegWei);
                }
            }

            IndepStatistic indepSta = new IndepStatistic(idx, unitStatisList);
            return indepSta;
        }

        /// except special value, independent statistic to specified index 
        public IndepStatistic IndepStatis(int idx, List<double> specialValueList)
        {
            List<UnitStatistic> unitStatisList = new List<UnitStatistic>();

            double value;
            UnitStatistic unitSta;
            for (int i = 0; i < sampleList.Count; ++i)
            {
                value = sampleList[i].IndepValues[idx];
                if (specialValueList.Contains(value))
                {
                    continue;
                }
                unitSta = unitStatisList.Find(
                    delegate(UnitStatistic sta)
                    {
                        return sta.IndepValue == value;
                    });
                if (unitSta != null)
                {
                    unitSta.SampleCountsEquValue++;
                    unitSta.WeightSumEquValue += sampleList[i].Weight;
                    if (sampleList[i].DepValue == 1.0)
                    {
                        unitSta.SampleCountsEquValueDepEqu1++;
                        unitSta.WeightSumEquValueDepEqu1 += sampleList[i].Weight;
                    }
                }
                else
                {
                    UnitStatistic sta = new UnitStatistic(idx, value, 1, sampleList[i].Weight, 0, 0.0);
                    if (sampleList[i].DepValue == 1.0)
                    {
                        sta.SampleCountsEquValueDepEqu1 = 1;
                        sta.WeightSumEquValueDepEqu1 = sampleList[i].Weight;
                    }
                    unitStatisList.Add(sta);
                }
            }
            if (unitStatisList.Count == 0)
            {
                return null;
            }

            // sort result
            for (int i = 0; i < unitStatisList.Count; ++i)
            {
                unitStatisList.Sort(
                    delegate(UnitStatistic indSta1, UnitStatistic indSta2)
                    {
                        return indSta1.IndepValue.CompareTo(indSta2.IndepValue);
                    });
            }

            // accumulated statistic
            int unitCount = unitStatisList.Count;
            for (int i = 0; i < unitCount; ++i)
            {
                unitSta = unitStatisList[i];
                for (int k = 0; k < unitCount; ++k)
                {
                    if (unitStatisList[k].IndepValue > unitStatisList[i].IndepValue)
                    {
                        break;
                    }
                    unitSta.AccuSampleCountsEquValue += unitStatisList[k].SampleCountsEquValue;
                    unitSta.AccuWeightEquValue += unitStatisList[k].WeightSumEquValue;
                    unitSta.AccuSampleCountsEquValueDepEqu1 +=
                        unitStatisList[k].SampleCountsEquValueDepEqu1;
                    unitSta.AccuWeightEquValueDepEqu1 += unitStatisList[k].WeightSumEquValueDepEqu1;
                }
            }

            //weights-of-evidence statistic
            double smallValue = 0.0000000001;
            // V
            double VA = unitStatisList[unitCount - 1].AccuWeightEquValue;
            // D+
            double Dp = unitStatisList[unitCount - 1].AccuWeightEquValueDepEqu1;
            // D-
            double Dn = VA - Dp;
            // V+, V-, V+D+, V+D-, V-D+, V-D-
            double Vp, Vn, VpDp, VpDn, VnDp, VnDn;
            for (int i = 0; i < unitCount; ++i)
            {
                Vp = unitStatisList[i].AccuWeightEquValue;
                Vn = VA - Vp;
                VpDp = unitStatisList[i].AccuWeightEquValueDepEqu1;
                VpDn = Vp - VpDp;
                VnDp = Dp - VpDp;
                VnDn = Vn - VnDp;
                if (Math.Abs(Dp - 0.0) < smallValue || Math.Abs(Dn - 0.0) < smallValue ||
                    Math.Abs(VpDp - 0.0) < smallValue || Math.Abs(VpDn - 0.0) < smallValue ||
                    Math.Abs(VnDp - 0.0) < smallValue || Math.Abs(VnDn - 0.0) < smallValue)
                {
                    unitStatisList[i].PossiveWeight = double.NaN;
                    unitStatisList[i].NegativeWeight = double.NaN;
                    unitStatisList[i].VarianceOfPosWei = double.NaN;
                    unitStatisList[i].VarianceOfNegWei = double.NaN;
                    unitStatisList[i].DifferenceOfWei = double.NaN;
                    unitStatisList[i].TStatistic = double.NaN;
                }
                else
                {
                    unitStatisList[i].PossiveWeight = Math.Log((VpDp / Dp) / (VpDn / Dn));
                    unitStatisList[i].NegativeWeight = Math.Log((VnDp / Dp) / (VnDn / Dn));
                    unitStatisList[i].VarianceOfPosWei = 1.0 / VpDp + 1.0 / VpDn;
                    unitStatisList[i].VarianceOfNegWei = 1.0 / VnDp + 1.0 / VnDn;
                    unitStatisList[i].DifferenceOfWei =
                        unitStatisList[i].PossiveWeight - unitStatisList[i].NegativeWeight;
                    unitStatisList[i].TStatistic = Math.Abs(unitStatisList[i].DifferenceOfWei) /
                        Math.Sqrt(unitStatisList[i].VarianceOfPosWei +
                        unitStatisList[i].VarianceOfNegWei);
                }
            }

            IndepStatistic indepSta = new IndepStatistic(idx, unitStatisList);
            return indepSta;
        }

        public IndepStatistic[] IndepStatis()
        {
            IndepStatistic[] result = new IndepStatistic[indepCount];
            IndepStatistic indepSta;
            for (int i = 0; i < indepCount; ++i)
            {
                indepSta = IndepStatis(i);
                result[i] = indepSta;
            }
            return result;
        }

        public void OutputSampleList(string txtFileName)
        {
            FileStream fsw = new FileStream(txtFileName, FileMode.Create);
            StreamWriter sw = new StreamWriter(fsw);
            string lineText = "";
            // head
            lineText = "Index ";
            for (int i = 0; i < indepCount; ++i)
            {
                lineText += "Indep_";
                lineText += i.ToString();
                lineText += " ";
            }
            lineText += "Dep Weight";
            sw.WriteLine(lineText);

            // content
            for (int i = 0; i < sampleList.Count; ++i)
            {
                lineText = i.ToString() + " ";
                for (int j = 0; j < indepCount; ++j)
                {
                    lineText += sampleList[i].IndepValues[j].ToString();
                    lineText += " ";
                }
                lineText += sampleList[i].DepValue.ToString();
                lineText += " ";
                lineText += sampleList[i].Weight.ToString();
                sw.WriteLine(lineText);
            }
            sw.Close();
            sw.Dispose();
            sw = null;
            fsw.Close();
            fsw.Dispose();
            fsw = null;
        }

        public void OutputStatisticHead(string txtFileName)
        {
            FileStream fsw = new FileStream(txtFileName, FileMode.Create);
            StreamWriter sw = new StreamWriter(fsw);
            string lineText = "";
            lineText = "Index Value CountsEquValue WeightSumEquValue " +
                       "CountsEquValueDepEqu1 WeightSumEquValueDepEqu1 " +
                       "AccuCountsEquValue AccuWeightEquValue " +
                       "AccuCountsEquValueDepEqu1 AccuWeightEquValueDepEqu1 " +
                       "PosWeight NegWeight VarPosWei VarNegWei DifOfWei TStatistic";
            sw.WriteLine(lineText);
            sw.Close();
            sw.Dispose();
            sw = null;
            fsw.Close();
            fsw.Dispose();
            fsw = null;
        }

        /// sample counts that dependent value equal to 0 and 1
        public int[] GetDepEqu0And1Nums()
        {
            int[] num = new int[2];
            num[0] = 0;
            num[1] = 0;
            foreach (Sample sample in sampleList)
            {
                if (sample.DepValue == 0.0)
                {
                    num[0]++;
                }
                else if (sample.DepValue == 1.0)
                {
                    num[1]++;
                }
            }
            return num;
        }

        /// (sample counts that dependent value equal to 1) / (all counts)
        public double RatioOfDepEqu1Counts()
        {
            int num = 0;
            foreach (Sample sample in sampleList)
            {
                if (sample.DepValue == 1.0)
                {
                    num++;
                }
            }
            return num / sampleList.Count;
        }

        public List<Sample> CombineSamples(List<int> idxList)
        {
            List<Sample> result = new List<Sample>();
            for (int i = 0; i < sampleList.Count; i++)
            {
                bool aa = false;

                Sample samA = sampleList[i];
                for (int j = 0; j < result.Count; ++j)
                {
                    bool bb = true;
                    Sample samB = result[j];
                    if (samA.DepValue != samB.DepValue)
                    {
                        bb = false;
                    }
                    else
                    {
                        for (int k = 0; k < idxList.Count; ++k)
                        {
                            if (samA.IndepValues[idxList[k]] != samB.IndepValues[idxList[k]])
                            {
                                bb = false;
                                break;
                            }
                        }
                    }
                    if (bb)
                    {
                        result[j].Weight += sampleList[i].Weight;
                        aa = true;
                        break;
                    }
                }
                if (!aa)
                {
                    result.Add(new Sample(sampleList[i]));
                }
            }
            return result;
        }

        public List<Sample> CombineSamples()
        {
            List<int> idxList = new List<int>();
            for (int i = 0; i < indepCount; ++i)
            {
                idxList.Add(i);
            }
            List<Sample> result = CombineSamples(idxList);
            return result;
        }

        public List<Sample> IndepValueChange(int idx, double demarcation,
            double value0, double value1)
        {
            List<Sample> result = new List<Sample>();
            Sample sample;
            for (int i = 0; i < sampleList.Count; ++i)
            {
                sample = new Sample(sampleList[i]);
                if (sample.IndepValues[idx] <= demarcation)
                {
                    sample.IndepValues[idx] = value0;
                }
                else
                {
                    sample.IndepValues[idx] = value1;
                }
                result.Add(sample);
            }
            return result;
        }

        public List<Sample> IndepValueChange(int idx, double demarcation,
            double value0, double value1, double specialValue, double value2)
        {
            List<Sample> result = new List<Sample>();
            Sample sample;
            for (int i = 0; i < sampleList.Count; ++i)
            {
                sample = new Sample(sampleList[i]);
                if (sample.IndepValues[idx] == specialValue)
                {
                    sample.IndepValues[idx] = value2;
                }
                else
                {
                    if (sample.IndepValues[idx] <= demarcation)
                    {
                        sample.IndepValues[idx] = value0;
                    }
                    else
                    {
                        sample.IndepValues[idx] = value1;
                    }
                }
                result.Add(sample);
            }
            return result;
        }

    }
}
